In [1]:
import os
## Set directory
os.chdir('/hpc/group/pbenfeylab/CheWei/CW_data/genesys')
import networkx as nx
from genesys_evaluate_v1 import *
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import warnings
# Suppress all warning messages
warnings.filterwarnings("ignore", category=DeprecationWarning)
/hpc/group/pbenfeylab/ch416/miniconda3/envs/genesys/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html from .autonotebook import tqdm as notebook_tqdm
In [2]:
## Conda Env genesys on DCC
print(torch.__version__)
print(sc.__version__)
1.11.0 1.9.6
In [3]:
## Genes considered/used (shared among samples)
gene_list = pd.read_csv('./gene_list_1108.csv')
Load Data¶
In [4]:
with open("./genesys_root_data.pkl", 'rb') as file_handle:
data = pickle.load(file_handle)
batch_size = 2000
#dataset = Root_Dataset(data['X_test'], data['y_test'])
#loader = DataLoader(dataset,
# batch_size = batch_size,
# shuffle = True, drop_last=True)
X_all = np.vstack((data['X_train'],data['X_val'],data['X_test']))
y_all = pd.concat((data['y_train'],data['y_val'],data['y_test']))
dataset = Root_Dataset(X_all, y_all)
loader = DataLoader(dataset,
batch_size = batch_size,
shuffle = True, drop_last=True)
In [5]:
classes = ['Columella', 'Lateral Root Cap', 'Phloem', 'Xylem', 'Procambium', 'Pericycle', 'Endodermis', 'Cortex', 'Atrichoblast', 'Trichoblast']
class2num = {c: i for (i, c) in enumerate(classes)}
num2class = {i: c for (i, c) in enumerate(classes)}
In [6]:
cts = ['Atrichoblast','Trichoblast','Cortex','Endodermis','Pericycle','Procambium','Xylem','Phloem','Lateral Root Cap','Columella']
ctw = np.zeros((len(cts), 17513, 17513))
## number of cells sampled from the atlas
batch_size = 2000
Extract a sample¶
In [7]:
sample = next(iter(loader))
xo = sample['x'].to(device)
y = sample['y'].to(device)
y_label = [num2class[i] for i in y.tolist()]
In [8]:
xo.shape
Out[8]:
torch.Size([2000, 11, 17513])
In [9]:
len(y_label)
Out[9]:
2000
In [10]:
len(loader)
Out[10]:
55
In [11]:
## GRN for the transition t7 to t9
for ct in cts:
print(ct)
cws = np.zeros((len(loader), 17513, 17513))
with torch.no_grad():
for i, sample in enumerate(loader):
x = sample['x'].to(device)
y = sample['y'].to(device)
y_label = [num2class[i] for i in y.tolist()]
#pred_h = model.init_hidden(batch_size)
#tfrom = model.generate_current(x, pred_h, 0).to('cpu').detach().numpy()
#cfrom = tfrom[np.where(np.array(y_label)==ct)[0],:]
cfrom = x[np.where(np.array(y_label)==ct)[0],7,:]
#pred_h = model.init_hidden(batch_size)
#tto = model.generate_next(x, pred_h, 0).to('cpu').detach().numpy()
#cto = tto[np.where(np.array(y_label)==ct)[0],:]
cto = x[np.where(np.array(y_label)==ct)[0],9,:]
cw = torch.linalg.lstsq(torch.tensor(cfrom), torch.tensor(cto)).solution.detach().numpy()
cws[i] = cw
## Calculate mean across number of repeats
cwm = np.mean(cws, axis=0)
ctw[cts.index(ct)] = cwm
Atrichoblast
/tmp/ipykernel_984347/1398938877.py:21: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor). cw = torch.linalg.lstsq(torch.tensor(cfrom), torch.tensor(cto)).solution.detach().numpy()
Trichoblast Cortex Endodermis Pericycle Procambium Xylem Phloem Lateral Root Cap Columella
In [12]:
# Save the array to disk
np.save('genesys_raw_ctw_t7-t9.npy', ctw)
In [11]:
ctw = np.load('genesys_raw_ctw_t7-t9.npy')
In [12]:
## Calculate z-scores
ctw_z = np.zeros((len(cts), 17513, 17513))
for i in range(len(cts)):
if np.std(ctw[i])==0:
ctw_z[i] = np.zeros((17513, 17513))
else:
ctw_z[i] = (ctw[i] - np.mean(ctw[i])) / np.std(ctw[i])
In [13]:
## Filtering based on z-scores (with no weights)
ctw_f = np.zeros((len(cts), 17513, 17513))
## z-score threshold (keep values > mean + threshold*std)
threshold=3
for i in range(len(cts)):
ctw_f[i] = np.abs(ctw_z[i]) > threshold
Load TFs list¶
In [14]:
wanted_TFs = pd.read_csv("./Kay_TF_thalemine_annotations.csv")
In [15]:
## Make TF names unique and assign preferred names
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G33880"]="WOX9"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G45160"]="SCL27"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G04410"]="NAC78"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G29035"]="ORS1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02540"]="ZHD3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G16500"]="IAA26"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G09740"]="HAG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G24660"]="ZHD2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G46880"]="HDG5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G28420"]="RLT1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G14580"]="BLJ"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G45260"]="BIB"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G02070"]="RVN"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G28160"]="FIT"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G68360"]="GIS3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G20640"]="NLP4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G05550"]="VFP5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G59470"]="FRF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G15150"]="HAT7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G14750"]="WER"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G75710"]="BRON"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G74500"]="TMO7"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT2G12646"]="RITF1"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT3G48100"]="ARR5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G16141"]="GATA17L"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G65640"]="NFL"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G62700"]="VND5"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT4G36160"]="VND2"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G66300"]="VND3"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT1G12260"]="VND4"
wanted_TFs['Name'][wanted_TFs['GeneID']=="AT5G62380"]="VND6"
In [16]:
pd.Series(wanted_TFs['Name']).value_counts().head(5)
Out[16]:
Name NAC001 1 PRE5 1 MYB118 1 MYB21 1 MYB0 1 Name: count, dtype: int64
Network Analysis¶
In [17]:
TFidx = []
for i in wanted_TFs['GeneID']:
if i in gene_list['features'].tolist():
TFidx.append(np.where(gene_list['features']==i)[0][0])
TFidx = np.sort(np.array(TFidx))
In [19]:
def network(i):
## No weights
adj_nw = ctw_f[i]
## Weighted
adj = ctw[i]*ctw_f[i]
## TF only
adj = adj[np.ix_(TFidx,TFidx)]
adj_nw = adj_nw[np.ix_(TFidx,TFidx)]
## Remove no connect
regidx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[0]).value_counts().index[pd.Series(np.where(adj_nw==True)[0]).value_counts()>=1]))
taridx = np.sort(np.array(pd.Series(np.where(adj_nw==True)[1]).value_counts().index[pd.Series(np.where(adj_nw==True)[1]).value_counts()>=1]))
## Reciprocol
keepidx = np.sort(np.array(list(set(regidx).intersection(taridx))))
#keepidx = np.sort(np.array(list(set(regidx).union(taridx))))
TFID = np.array(gene_list['features'][TFidx])[keepidx].tolist()
## TF name to keep
TFname = []
for i in np.array(gene_list['features'][TFidx])[keepidx]:
TFname.append(wanted_TFs['Name'][np.where(wanted_TFs['GeneID']==i)[0][0]])
adj = adj[np.ix_(keepidx,keepidx)]
# Create a NetworkX graph for non-directed edges
G = nx.Graph() # supports directed edges and allows for multiple edges between the same pair of nodes
# Add nodes to the graph
num_nodes = adj.shape[0]
for i, name in enumerate(TFname):
G.add_node(i, name=name)
# Add edges to the graph with weights
for i in range(num_nodes):
for j in range(num_nodes):
weight = adj[i, j]
if weight != 0:
G.add_edge(j, i, weight=abs(weight), distance=1/abs(weight))
## Measures the extent to which how close a node is to all other nodes in the network, considering the shortest paths or geodesic distances between nodes
closeness_centrality = nx.closeness_centrality(G, distance='distance')
## Measures the extent to which a node that are not only well-connected but also connected to other well-connected nodes.
eigenvector_centrality = nx.eigenvector_centrality(G)
# Create a NetworkX graph for diected edges
G = nx.MultiDiGraph() # supports directed edges and allows for multiple edges between the same pair of nodes
# Add nodes to the graph
num_nodes = adj.shape[0]
for i, name in enumerate(TFname):
G.add_node(i, name=name)
# Add edges to the graph with weights
for i in range(num_nodes):
for j in range(num_nodes):
weight = adj[i, j]
if weight != 0:
G.add_edge(j, i, weight=weight)
## Measures the number of connections (edges) each node has
degree_centrality = nx.degree_centrality(G)
# Calculate outgoing centrality
out_centrality = nx.out_degree_centrality(G)
# Calculate incoming centrality
in_centrality = nx.in_degree_centrality(G)
## Measures the extent to which a node lies on the shortest paths between other nodes.
betweenness_centrality = nx.betweenness_centrality(G, weight='weight')
## Non_Reciprocal Out centrality
# Visualize the graph
pos = nx.spring_layout(G) # Positions of the nodes
# Node colors based on weighted betweenness centrality
node_colors = [out_centrality[node] for node in G.nodes()]
# Node sizes based on weighted betweenness centrality
node_sizes = [out_centrality[node] * 1000 for node in G.nodes()]
# Get the edge weights as a dictionary
edge_weights = nx.get_edge_attributes(G, 'weight')
edge_colors = ['red' if weight > 0 else 'blue' for (_, _, weight) in G.edges(data='weight')]
# Scale the edge weights to desired linewidths
max_weight = max(edge_weights.values())
edge_widths = [float(edge_weights[edge]) / max_weight for edge in G.edges]
# Draw the graph
nx.draw(G, pos=pos, node_color=node_colors, node_size=node_sizes, with_labels=False, width=edge_widths, edge_color=edge_colors)
# Add node labels
labels = {node: G.nodes[node]['name'] for node in G.nodes}
nx.draw_networkx_labels(G, pos=pos, labels=labels, font_size=8)
# Add a colorbar to show the weighted betweenness centrality color mapping
sm = plt.cm.ScalarMappable(cmap='viridis', norm=plt.Normalize(vmin=min(node_colors), vmax=max(node_colors)))
sm.set_array([])
plt.colorbar(sm)
# Show the plot
plt.show()
dc = pd.DataFrame.from_dict(degree_centrality, orient='index', columns=['degree_centrality'])
oc = pd.DataFrame.from_dict(out_centrality, orient='index', columns=['out_centrality'])
ic = pd.DataFrame.from_dict(in_centrality, orient='index', columns=['in_centrality'])
bc = pd.DataFrame.from_dict(betweenness_centrality, orient='index', columns=['betweenness_centrality'])
cc = pd.DataFrame.from_dict(closeness_centrality, orient='index', columns=['closeness_centrality'])
ec = pd.DataFrame.from_dict(eigenvector_centrality, orient='index', columns=['eigenvector_centrality'])
df = pd.concat([dc,oc,ic,bc,cc,ec], axis=1)
df.index =TFname
df = df.sort_values('betweenness_centrality', ascending=False)
return(df)
In [20]:
atri = network(0)
In [21]:
atri
Out[21]:
| degree_centrality | out_centrality | in_centrality | betweenness_centrality | closeness_centrality | eigenvector_centrality | |
|---|---|---|---|---|---|---|
| TGA10 | 1.077088 | 0.595289 | 0.481799 | 0.967931 | 0.000342 | 0.182885 |
| IAA1 | 0.126338 | 0.040685 | 0.085653 | 0.955253 | 0.000240 | 0.059468 |
| PHE1 | 0.057816 | 0.029979 | 0.027837 | 0.954554 | 0.000180 | 0.029390 |
| LBD25 | 0.391863 | 0.261242 | 0.130621 | 0.953971 | 0.000252 | 0.109889 |
| AT3G21330 | 0.094218 | 0.059957 | 0.034261 | 0.953824 | 0.000202 | 0.041413 |
| ... | ... | ... | ... | ... | ... | ... |
| JMJ18 | 0.008565 | 0.004283 | 0.004283 | 0.000000 | 0.000178 | 0.008915 |
| AT1G62310 | 0.008565 | 0.004283 | 0.004283 | 0.000000 | 0.000149 | 0.005849 |
| AT5G07810 | 0.025696 | 0.019272 | 0.006424 | 0.000000 | 0.000162 | 0.016625 |
| FBH3 | 0.064240 | 0.017131 | 0.047109 | 0.000000 | 0.000229 | 0.039612 |
| SAP7 | 0.006424 | 0.004283 | 0.002141 | 0.000000 | 0.000163 | 0.004699 |
468 rows × 6 columns
In [22]:
tri = network(1)
In [23]:
cor = network(2)
In [24]:
end = network(3)
In [25]:
per = network(4)
In [26]:
pro = network(5)
In [27]:
xyl = network(6)
In [28]:
phl = network(7)
In [29]:
lrc = network(8)
In [30]:
col = network(9)
In [31]:
atri.columns = ['atri_degree_centrality','atri_out_centrality','atri_in_centrality','atri_betweenness_centrality','atri_closeness_centrality','atri_eigenvector_centrality']
tri.columns = ['tri_degree_centrality','tri_out_centrality','tri_in_centrality','tri_betweenness_centrality','tri_closeness_centrality','tri_eigenvector_centrality']
cor.columns = ['cor_degree_centrality','cor_out_centrality','cor_in_centrality','cor_betweenness_centrality','cor_closeness_centrality','cor_eigenvector_centrality']
end.columns = ['end_degree_centrality','end_out_centrality','end_in_centrality','end_betweenness_centrality','end_closeness_centrality','end_eigenvector_centrality']
per.columns = ['per_degree_centrality','per_out_centrality','per_in_centrality','per_betweenness_centrality','per_closeness_centrality','per_eigenvector_centrality']
pro.columns = ['pro_degree_centrality','pro_out_centrality','pro_in_centrality','pro_betweenness_centrality','pro_closeness_centrality','pro_eigenvector_centrality']
xyl.columns = ['xyl_degree_centrality','xyl_out_centrality','xyl_in_centrality','xyl_betweenness_centrality','xyl_closeness_centrality','xyl_eigenvector_centrality']
phl.columns = ['phl_degree_centrality','phl_out_centrality','phl_in_centrality','phl_betweenness_centrality','phl_closeness_centrality','phl_eigenvector_centrality']
lrc.columns = ['lrc_degree_centrality','lrc_out_centrality','lrc_in_centrality','lrc_betweenness_centrality','lrc_closeness_centrality','lrc_eigenvector_centrality']
col.columns = ['col_degree_centrality','col_out_centrality','col_in_centrality','col_betweenness_centrality','col_closeness_centrality','col_eigenvector_centrality']
In [32]:
## Indentify main regulators in each net work
tff = []
tff = tff + atri[atri['atri_betweenness_centrality']>0].index.tolist()
tff = tff + tri[tri['tri_betweenness_centrality']>0].index.tolist()
tff = tff + lrc[lrc['lrc_betweenness_centrality']>0].index.tolist()
tff = tff + cor[cor['cor_betweenness_centrality']>0].index.tolist()
tff = tff + end[end['end_betweenness_centrality']>0].index.tolist()
tff = tff + per[per['per_betweenness_centrality']>0].index.tolist()
tff = tff + pro[pro['pro_betweenness_centrality']>0].index.tolist()
tff = tff + xyl[xyl['xyl_betweenness_centrality']>0].index.tolist()
tff = tff + phl[phl['phl_betweenness_centrality']>0].index.tolist()
tff = tff + col[col['col_betweenness_centrality']>0].index.tolist()
tf_occurance = pd.DataFrame(pd.Series(tff).value_counts())
tf_occurance = tf_occurance.rename(columns={
'count': 'tf_occurance'
})
tf_spec = pd.concat([tf_occurance, atri, tri, lrc, cor, end, per, pro, xyl, phl, col], axis=1)
tf_spec = tf_spec.fillna(0)
In [33]:
## Epidermis (atri, tri, lrc)
celltype1='atri'
celltype2='tri'
celltype3='lrc'
ts = tf_spec[tf_spec['tf_occurance']==3][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==9].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[33]:
| atri_betweenness_centrality | tri_betweenness_centrality | lrc_betweenness_centrality | atri_out_centrality | tri_out_centrality | lrc_out_centrality | atri_in_centrality | tri_in_centrality | lrc_in_centrality | centrality_count | centrality_sum |
|---|
In [34]:
## atri, tri
celltype1='atri'
celltype2='tri'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[34]:
| atri_betweenness_centrality | tri_betweenness_centrality | atri_out_centrality | tri_out_centrality | atri_in_centrality | tri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|
| LRL3 | 0.000005 | 0.319964 | 0.006424 | 0.965909 | 0.004283 | 0.920455 | 6 | 8.217039 |
| AT3G05860 | 0.779462 | 0.007809 | 0.284797 | 0.009470 | 0.331906 | 0.049242 | 6 | 7.462685 |
| AT5G06800 | 0.004949 | 0.552962 | 0.029979 | 0.331439 | 0.072805 | 0.134470 | 6 | 7.126604 |
| Rap2.6L | 0.128820 | 0.000783 | 0.171306 | 0.041667 | 0.306210 | 0.160985 | 6 | 6.809771 |
| MC2 | 0.091562 | 0.009958 | 0.149893 | 0.217803 | 0.092077 | 0.123106 | 6 | 6.684400 |
| HB24 | 0.226406 | 0.001362 | 0.137045 | 0.009470 | 0.149893 | 0.013258 | 6 | 6.537434 |
| HFR1 | 0.000482 | 0.011788 | 0.044968 | 0.109848 | 0.132762 | 0.030303 | 6 | 6.330152 |
| NAC003 | 0.002063 | 0.000173 | 0.092077 | 0.047348 | 0.074946 | 0.062500 | 6 | 6.279108 |
In [35]:
## Atrichoblast specific
celltype = 'atri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[35]:
| atri_betweenness_centrality | atri_out_centrality | atri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| PHE1 | 0.954554 | 0.029979 | 0.027837 | 3 | 4.012370 |
| AT3G21330 | 0.953824 | 0.059957 | 0.034261 | 3 | 4.048042 |
| WRKY13 | 0.947124 | 0.102784 | 0.128480 | 3 | 4.178387 |
| AT2G28710 | 0.942382 | 0.353319 | 0.269807 | 3 | 4.565508 |
| MEA | 0.453713 | 0.130621 | 0.132762 | 3 | 3.717097 |
| AT4G38070 | 0.294368 | 0.017131 | 0.006424 | 3 | 3.317923 |
| GATA17 | 0.212529 | 0.029979 | 0.008565 | 3 | 3.251073 |
| GL2 | 0.105398 | 0.072805 | 0.164882 | 3 | 3.343086 |
| AT4G31650 | 0.098418 | 0.021413 | 0.023555 | 3 | 3.143386 |
| AT5G26749 | 0.049682 | 0.010707 | 0.017131 | 3 | 3.077520 |
| ULT2 | 0.022980 | 0.062099 | 0.040685 | 3 | 3.125764 |
| AT2G24690 | 0.007577 | 0.025696 | 0.128480 | 3 | 3.161753 |
| AT1G14600 | 0.004071 | 0.008565 | 0.102784 | 3 | 3.115420 |
| NAC044 | 0.003138 | 0.027837 | 0.014989 | 3 | 3.045965 |
| AT4G01350 | 0.002573 | 0.085653 | 0.044968 | 3 | 3.133194 |
| AT3G13840 | 0.002141 | 0.002141 | 0.042827 | 3 | 3.047109 |
| AT1G63840 | 0.001595 | 0.074946 | 0.042827 | 3 | 3.119368 |
| TTG2 | 0.001130 | 0.040685 | 0.092077 | 3 | 3.133893 |
| EMB1789 | 0.001002 | 0.004283 | 0.004283 | 3 | 3.009567 |
| AT3G16280 | 0.000800 | 0.023555 | 0.055675 | 3 | 3.080029 |
| WOL | 0.000708 | 0.029979 | 0.049251 | 3 | 3.079937 |
| ZFN3 | 0.000565 | 0.014989 | 0.019272 | 3 | 3.034826 |
| AT2G06025 | 0.000505 | 0.085653 | 0.021413 | 3 | 3.107572 |
| GATA26 | 0.000427 | 0.021413 | 0.044968 | 3 | 3.066809 |
| KELP | 0.000110 | 0.004283 | 0.006424 | 3 | 3.010817 |
| ALY1 | 0.000009 | 0.014989 | 0.014989 | 3 | 3.029988 |
In [36]:
## Trichoblast specific
celltype = 'tri'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[36]:
| tri_betweenness_centrality | tri_out_centrality | tri_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| RAP2.11 | 0.899341 | 0.214015 | 0.034091 | 3 | 4.147447 |
| EIL2 | 0.898780 | 0.096591 | 0.022727 | 3 | 4.018098 |
| AT5G56200 | 0.892592 | 0.225379 | 0.073864 | 3 | 4.191834 |
| RSL2 | 0.892301 | 0.740530 | 0.242424 | 3 | 4.875255 |
| RSL4 | 0.877641 | 0.412879 | 0.111742 | 3 | 4.402263 |
| 4-Sep | 0.841628 | 0.009470 | 0.005682 | 3 | 3.856779 |
| HAT9 | 0.567244 | 0.015152 | 0.005682 | 3 | 3.588077 |
| RHD6 | 0.545361 | 0.494318 | 0.340909 | 3 | 4.380588 |
| AT1G02040 | 0.530274 | 0.005682 | 0.005682 | 3 | 3.541638 |
| AT2G20030 | 0.475073 | 0.215909 | 0.034091 | 3 | 3.725073 |
| NAC016 | 0.208243 | 0.009470 | 0.024621 | 3 | 3.242334 |
| AT4G09100 | 0.115865 | 0.407197 | 0.337121 | 3 | 3.860183 |
| AT2G28920 | 0.114053 | 0.003788 | 0.003788 | 3 | 3.121629 |
| AT5G65130 | 0.109335 | 0.037879 | 0.007576 | 3 | 3.154789 |
| AT2G05160 | 0.068552 | 0.236742 | 0.123106 | 3 | 3.428400 |
| AT1G18335 | 0.013150 | 0.017045 | 0.007576 | 3 | 3.037771 |
| GL3 | 0.006314 | 0.013258 | 0.020833 | 3 | 3.040405 |
| AT2G14760 | 0.003479 | 0.028409 | 0.013258 | 3 | 3.045145 |
| AT2G37120 | 0.001959 | 0.018939 | 0.102273 | 3 | 3.123171 |
| AT2G01060 | 0.000367 | 0.024621 | 0.017045 | 3 | 3.042033 |
| ABF3 | 0.000032 | 0.005682 | 0.022727 | 3 | 3.028441 |
| AT2G39000 | 0.000022 | 0.022727 | 0.020833 | 3 | 3.043582 |
| NAC084 | 0.000007 | 0.013258 | 0.034091 | 3 | 3.047356 |
In [37]:
## LRC specific
celltype = 'lrc'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[37]:
| lrc_betweenness_centrality | lrc_out_centrality | lrc_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| OFP6 | 0.843085 | 0.199659 | 0.116041 | 3 | 4.158785 |
| bZIP52 | 0.795344 | 0.015358 | 0.010239 | 3 | 3.820942 |
| AT3G46070 | 0.312657 | 0.003413 | 0.011945 | 3 | 3.328016 |
| AGL17 | 0.147230 | 0.008532 | 0.046075 | 3 | 3.201838 |
| BNQ3 | 0.033138 | 0.061433 | 0.029010 | 3 | 3.123582 |
| REF6 | 0.030606 | 0.003413 | 0.023891 | 3 | 3.057910 |
| AT4G14720 | 0.013731 | 0.013652 | 0.023891 | 3 | 3.051273 |
| HMGB5 | 0.007754 | 0.001706 | 0.040956 | 3 | 3.050416 |
| BZIP34 | 0.003378 | 0.027304 | 0.035836 | 3 | 3.066518 |
| HB4 | 0.002879 | 0.008532 | 0.032423 | 3 | 3.043835 |
| LSMT-L | 0.002135 | 0.001706 | 0.027304 | 3 | 3.031146 |
| AT1G58220 | 0.001701 | 0.003413 | 0.010239 | 3 | 3.015353 |
| AL6 | 0.000977 | 0.011945 | 0.029010 | 3 | 3.041933 |
| SDIR1 | 0.000744 | 0.001706 | 0.017065 | 3 | 3.019515 |
| ZF1 | 0.000738 | 0.005119 | 0.008532 | 3 | 3.014390 |
| PYE | 0.000651 | 0.032423 | 0.042662 | 3 | 3.075736 |
| AT3G19080 | 0.000598 | 0.001706 | 0.005119 | 3 | 3.007424 |
| HAM3 | 0.000478 | 0.104096 | 0.052901 | 3 | 3.157475 |
| AT5G41580 | 0.000146 | 0.008532 | 0.034130 | 3 | 3.042808 |
| AT1G26680 | 0.000012 | 0.001706 | 0.030717 | 3 | 3.032435 |
| TLP3 | 0.000012 | 0.010239 | 0.090444 | 3 | 3.100694 |
| AT5G58340 | 0.000012 | 0.001706 | 0.013652 | 3 | 3.015370 |
| VOZ2 | 0.000006 | 0.001706 | 0.022184 | 3 | 3.023897 |
In [38]:
## Columella specific
celltype = 'col'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[38]:
| col_betweenness_centrality | col_out_centrality | col_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| SUVH6 | 0.245093 | 0.018519 | 0.107143 | 3 | 3.370754 |
| SMZ | 0.064750 | 0.042328 | 0.063492 | 3 | 3.170570 |
| MYB4R1 | 0.038030 | 0.047619 | 0.062169 | 3 | 3.147819 |
| NAM | 0.030315 | 0.455026 | 0.212963 | 3 | 3.698304 |
| AT1G77570 | 0.016572 | 0.018519 | 0.091270 | 3 | 3.126360 |
| ... | ... | ... | ... | ... | ... |
| bHLH11 | 0.000005 | 0.002646 | 0.015873 | 3 | 3.018524 |
| AIL5 | 0.000004 | 0.006614 | 0.006614 | 3 | 3.013231 |
| MBD10 | 0.000004 | 0.023810 | 0.047619 | 3 | 3.071432 |
| SPL13A | 0.000002 | 0.007937 | 0.001323 | 3 | 3.009261 |
| HD2C | 0.000002 | 0.010582 | 0.033069 | 3 | 3.043653 |
134 rows × 5 columns
In [39]:
## Ground tissue
celltype1='cor'
celltype2='end'
ts = tf_spec[tf_spec['tf_occurance']==2][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==6].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[39]:
| cor_betweenness_centrality | end_betweenness_centrality | cor_out_centrality | end_out_centrality | cor_in_centrality | end_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|
| MYB122 | 0.069754 | 0.735343 | 0.165394 | 0.285307 | 0.058524 | 0.206847 | 6 | 7.521170 |
| IDD4 | 0.003375 | 0.049073 | 0.147583 | 0.057061 | 0.106870 | 0.042796 | 6 | 6.406758 |
| JKD | 0.005057 | 0.000919 | 0.104326 | 0.084165 | 0.078880 | 0.102710 | 6 | 6.376058 |
| ABA1 | 0.007205 | 0.000004 | 0.045802 | 0.019971 | 0.043257 | 0.048502 | 6 | 6.164741 |
In [40]:
## Cortex specific
celltype = 'cor'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[40]:
| cor_betweenness_centrality | cor_out_centrality | cor_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| IDD7 | 0.782923 | 0.129771 | 0.053435 | 3 | 3.966129 |
| AT3G51120 | 0.390877 | 0.010178 | 0.002545 | 3 | 3.403600 |
| AT1G72210 | 0.296055 | 0.198473 | 0.315522 | 3 | 3.810050 |
| CRF5 | 0.156969 | 0.002545 | 0.002545 | 3 | 3.162058 |
| AT2G37000 | 0.041414 | 0.005089 | 0.017812 | 3 | 3.064314 |
| AT2G38300 | 0.040855 | 0.096692 | 0.259542 | 3 | 3.397089 |
| RGL3 | 0.040011 | 0.012723 | 0.137405 | 3 | 3.190139 |
| COL4 | 0.005881 | 0.109415 | 0.099237 | 3 | 3.214532 |
| PIL5 | 0.005134 | 0.010178 | 0.002545 | 3 | 3.017857 |
| MYB14 | 0.002519 | 0.005089 | 0.068702 | 3 | 3.076310 |
| ZFN1 | 0.001792 | 0.119593 | 0.063613 | 3 | 3.184998 |
| ERF15 | 0.001448 | 0.017812 | 0.053435 | 3 | 3.072694 |
| SIGF | 0.000227 | 0.002545 | 0.025445 | 3 | 3.028217 |
| BPC4 | 0.000039 | 0.007634 | 0.005089 | 3 | 3.012762 |
In [41]:
## Endodermis specific
celltype = 'end'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[41]:
| end_betweenness_centrality | end_out_centrality | end_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| chr31 | 0.974708 | 0.512126 | 0.228245 | 3 | 4.715078 |
| RVN | 0.914977 | 0.205421 | 0.017118 | 3 | 4.137516 |
| AT1G03040 | 0.913301 | 0.116976 | 0.098431 | 3 | 4.128708 |
| MYB36 | 0.766462 | 0.924394 | 0.901569 | 3 | 5.592425 |
| bZIP58 | 0.685637 | 0.315264 | 0.191155 | 3 | 4.192056 |
| AGL102 | 0.467997 | 0.359486 | 0.057061 | 3 | 3.884545 |
| AT2G46810 | 0.437779 | 0.047076 | 0.001427 | 3 | 3.486281 |
| BLJ | 0.186350 | 0.185449 | 0.246790 | 3 | 3.618590 |
| AT4G38340 | 0.177084 | 0.209700 | 0.057061 | 3 | 3.443846 |
| SCR | 0.134273 | 0.105563 | 0.057061 | 3 | 3.296898 |
| SIGA | 0.093587 | 0.148359 | 0.027104 | 3 | 3.269050 |
| MYB68 | 0.077120 | 0.184023 | 0.365193 | 3 | 3.626336 |
| MYB3 | 0.019295 | 0.021398 | 0.189729 | 3 | 3.230422 |
| AT2G33720 | 0.004540 | 0.125535 | 0.012839 | 3 | 3.142914 |
| AT4G00940 | 0.003104 | 0.176890 | 0.152639 | 3 | 3.332633 |
| AT5G41920 | 0.002317 | 0.085592 | 0.025678 | 3 | 3.113587 |
| AT2G38090 | 0.000467 | 0.024251 | 0.011412 | 3 | 3.036130 |
| AT4G00390 | 0.000247 | 0.001427 | 0.007133 | 3 | 3.008806 |
| ERF3 | 0.000122 | 0.032810 | 0.031384 | 3 | 3.064316 |
| ATU2AF35A | 0.000084 | 0.009986 | 0.012839 | 3 | 3.022908 |
| AP3 | 0.000053 | 0.024251 | 0.012839 | 3 | 3.037143 |
| AT4G11680 | 0.000033 | 0.094151 | 0.015692 | 3 | 3.109876 |
| ZIM | 0.000012 | 0.005706 | 0.012839 | 3 | 3.018557 |
| BIB | 0.000004 | 0.074180 | 0.009986 | 3 | 3.084170 |
| YY1 | 0.000002 | 0.002853 | 0.014265 | 3 | 3.017120 |
In [42]:
## Stele
celltype1='per'
celltype2='pro'
celltype3='xyl'
celltype4='phl'
ts = tf_spec[tf_spec['tf_occurance']==4][[celltype1+'_betweenness_centrality', celltype2+'_betweenness_centrality', celltype3+'_betweenness_centrality', celltype4+'_betweenness_centrality', celltype1+'_out_centrality', celltype2+'_out_centrality', celltype3+'_out_centrality', celltype4+'_out_centrality', celltype1+'_in_centrality', celltype2+'_in_centrality', celltype3+'_in_centrality', celltype4+'_in_centrality']]
tso = (ts > 0)
ts['centrality_count'] = tso.sum(axis=1)
ts['centrality_sum'] = ts.sum(axis=1)
ts[ts['centrality_count']==12].sort_values(['centrality_count','centrality_sum'], ascending=False)
Out[42]:
| per_betweenness_centrality | pro_betweenness_centrality | xyl_betweenness_centrality | phl_betweenness_centrality | per_out_centrality | pro_out_centrality | xyl_out_centrality | phl_out_centrality | per_in_centrality | pro_in_centrality | xyl_in_centrality | phl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| TMO6 | 0.770435 | 0.535439 | 0.825228 | 0.177098 | 0.091644 | 0.038961 | 0.043956 | 0.187500 | 0.066038 | 0.038961 | 0.021978 | 0.310049 | 12 | 15.107286 |
| MYB20 | 0.956377 | 0.978269 | 0.019980 | 0.000069 | 0.208895 | 0.266955 | 0.057692 | 0.002451 | 0.318059 | 0.180375 | 0.068681 | 0.040441 | 12 | 15.098246 |
| IAA12 | 0.042892 | 0.882128 | 0.605909 | 0.000275 | 0.004043 | 0.676768 | 0.085165 | 0.051471 | 0.005391 | 0.233766 | 0.016484 | 0.030637 | 12 | 14.634929 |
In [43]:
## Pericycle
celltype = 'per'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[43]:
| per_betweenness_centrality | per_out_centrality | per_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| ZFP7 | 0.960866 | 0.020216 | 0.016173 | 3 | 3.997254 |
| BHLH32 | 0.953445 | 0.006739 | 0.028302 | 3 | 3.988485 |
| HB21 | 0.931609 | 0.355795 | 0.121294 | 3 | 4.408698 |
| AT1G26790 | 0.904322 | 0.115903 | 0.036388 | 3 | 4.056613 |
| HAT2 | 0.812881 | 0.138814 | 0.070081 | 3 | 4.021776 |
| GAI | 0.730724 | 0.036388 | 0.029650 | 3 | 3.796761 |
| DEL2 | 0.521156 | 0.008086 | 0.004043 | 3 | 3.533285 |
| LBD39 | 0.419987 | 0.028302 | 0.032345 | 3 | 3.480634 |
| bZIP4 | 0.210199 | 0.099730 | 0.036388 | 3 | 3.346318 |
| ING2 | 0.110154 | 0.006739 | 0.002695 | 3 | 3.119588 |
| AT1G64620 | 0.088969 | 0.032345 | 0.056604 | 3 | 3.177918 |
| ABO3 | 0.083565 | 0.002695 | 0.024259 | 3 | 3.110519 |
| ATL5 | 0.074912 | 0.216981 | 0.049865 | 3 | 3.341758 |
| GATA16 | 0.068515 | 0.039084 | 0.053908 | 3 | 3.161507 |
| AT1G61990 | 0.068153 | 0.001348 | 0.012129 | 3 | 3.081630 |
| NF-YB11 | 0.065785 | 0.002695 | 0.012129 | 3 | 3.080610 |
| AT4G35270 | 0.052564 | 0.133423 | 0.092992 | 3 | 3.278979 |
| CHR18 | 0.046173 | 0.008086 | 0.005391 | 3 | 3.059650 |
| MYBC1 | 0.041968 | 0.265499 | 0.324798 | 3 | 3.632265 |
| ICE1 | 0.040846 | 0.308625 | 0.098383 | 3 | 3.447854 |
| NST1 | 0.023067 | 0.004043 | 0.009434 | 3 | 3.036545 |
| AL5 | 0.013921 | 0.043127 | 0.040431 | 3 | 3.097479 |
| IDD11 | 0.010722 | 0.052561 | 0.018868 | 3 | 3.082150 |
| HB20 | 0.010560 | 0.063342 | 0.039084 | 3 | 3.112986 |
| LBD14 | 0.005394 | 0.012129 | 0.176550 | 3 | 3.194074 |
| LBD29 | 0.004063 | 0.004043 | 0.030997 | 3 | 3.039104 |
| AT5G26610 | 0.003452 | 0.001348 | 0.020216 | 3 | 3.025015 |
| NF-YA8 | 0.003221 | 0.001348 | 0.002695 | 3 | 3.007264 |
| BSM | 0.002855 | 0.002695 | 0.002695 | 3 | 3.008246 |
| AT4G01280 | 0.002022 | 0.002695 | 0.004043 | 3 | 3.008761 |
| LBD38 | 0.001828 | 0.030997 | 0.029650 | 3 | 3.062475 |
| AT2G40200 | 0.001391 | 0.008086 | 0.021563 | 3 | 3.031041 |
| AT1G61980 | 0.001348 | 0.001348 | 0.057951 | 3 | 3.060647 |
| GATA23 | 0.001348 | 0.002695 | 0.044474 | 3 | 3.048518 |
| AGL26 | 0.000378 | 0.013477 | 0.040431 | 3 | 3.054287 |
| SCL21 | 0.000262 | 0.001348 | 0.021563 | 3 | 3.023173 |
| AT5G46915 | 0.000218 | 0.001348 | 0.012129 | 3 | 3.013695 |
| AtHB23 | 0.000120 | 0.029650 | 0.040431 | 3 | 3.070201 |
| NF-YB2 | 0.000102 | 0.079515 | 0.026954 | 3 | 3.106571 |
| GATA4 | 0.000091 | 0.002695 | 0.006739 | 3 | 3.009525 |
| AT2G41710 | 0.000089 | 0.004043 | 0.082210 | 3 | 3.086342 |
| AT1G44810 | 0.000060 | 0.028302 | 0.045822 | 3 | 3.074184 |
| AT4G30180 | 0.000015 | 0.001348 | 0.016173 | 3 | 3.017535 |
| HDA3 | 0.000004 | 0.008086 | 0.068733 | 3 | 3.076823 |
In [44]:
## Procambium
celltype = 'pro'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[44]:
| pro_betweenness_centrality | pro_out_centrality | pro_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| AT3G06590 | 0.967585 | 0.148629 | 0.102453 | 3 | 4.218667 |
| BZIP24 | 0.961902 | 0.017316 | 0.020202 | 3 | 3.999420 |
| HB18 | 0.893226 | 0.109668 | 0.072150 | 3 | 4.075044 |
| AT4G23860 | 0.796827 | 0.007215 | 0.004329 | 3 | 3.808371 |
| AT1G26610 | 0.738731 | 0.011544 | 0.005772 | 3 | 3.756047 |
| AT4G03250 | 0.437924 | 0.007215 | 0.014430 | 3 | 3.459569 |
| HSFB4 | 0.331490 | 0.051948 | 0.030303 | 3 | 3.413741 |
| ARR15 | 0.306742 | 0.001443 | 0.004329 | 3 | 3.312514 |
| TTR1 | 0.200075 | 0.001443 | 0.002886 | 3 | 3.204404 |
| RGL1 | 0.083976 | 0.028860 | 0.004329 | 3 | 3.117165 |
| AT1G69570 | 0.080176 | 0.007215 | 0.008658 | 3 | 3.096049 |
| HAT3 | 0.061941 | 0.090909 | 0.060606 | 3 | 3.213456 |
| AT2G17150 | 0.041722 | 0.007215 | 0.001443 | 3 | 3.050380 |
| ZML1 | 0.019099 | 0.017316 | 0.012987 | 3 | 3.049402 |
| SHY2 | 0.014601 | 0.046176 | 0.030303 | 3 | 3.091080 |
| AGL58 | 0.013043 | 0.036075 | 0.008658 | 3 | 3.057776 |
| DOF1 | 0.011901 | 0.025974 | 0.073593 | 3 | 3.111468 |
| MYB60 | 0.009179 | 0.007215 | 0.050505 | 3 | 3.066899 |
| MYB10 | 0.007198 | 0.001443 | 0.017316 | 3 | 3.025957 |
| HAT1 | 0.004848 | 0.046176 | 0.041847 | 3 | 3.092871 |
| BZR1 | 0.002901 | 0.023088 | 0.025974 | 3 | 3.051963 |
| AT4G27900 | 0.002880 | 0.002886 | 0.004329 | 3 | 3.010095 |
| CRF12 | 0.002673 | 0.015873 | 0.024531 | 3 | 3.043077 |
| AT2G34450 | 0.001879 | 0.001443 | 0.002886 | 3 | 3.006208 |
| LRP1 | 0.001733 | 0.010101 | 0.017316 | 3 | 3.029150 |
| BZIP61 | 0.001629 | 0.014430 | 0.063492 | 3 | 3.079551 |
| AT4G27240 | 0.001458 | 0.025974 | 0.027417 | 3 | 3.054849 |
| BES1 | 0.000584 | 0.057720 | 0.027417 | 3 | 3.085721 |
| AT3G50650 | 0.000571 | 0.015873 | 0.004329 | 3 | 3.020773 |
| AT2G03470 | 0.000461 | 0.002886 | 0.011544 | 3 | 3.014891 |
| TCP20 | 0.000423 | 0.046176 | 0.021645 | 3 | 3.068244 |
| TCP8 | 0.000402 | 0.027417 | 0.023088 | 3 | 3.050908 |
| NF-YB10 | 0.000156 | 0.031746 | 0.031746 | 3 | 3.063648 |
| CHR38 | 0.000102 | 0.021645 | 0.004329 | 3 | 3.026076 |
| PIE1 | 0.000100 | 0.010101 | 0.010101 | 3 | 3.020302 |
| NF-YA4 | 0.000098 | 0.017316 | 0.012987 | 3 | 3.030401 |
| AT5G38840 | 0.000035 | 0.012987 | 0.008658 | 3 | 3.021680 |
| AT2G38950 | 0.000017 | 0.021645 | 0.025974 | 3 | 3.047636 |
| AL3 | 0.000006 | 0.015873 | 0.008658 | 3 | 3.024537 |
| AT5G13780 | 0.000006 | 0.012987 | 0.033189 | 3 | 3.046182 |
| AT2G39020 | 0.000004 | 0.017316 | 0.020202 | 3 | 3.037522 |
| HSF3 | 0.000002 | 0.014430 | 0.002886 | 3 | 3.017318 |
In [45]:
## Xylem
celltype = 'xyl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[45]:
| xyl_betweenness_centrality | xyl_out_centrality | xyl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| OFP10 | 0.969356 | 0.123626 | 0.131868 | 3 | 4.224851 |
| CRF6 | 0.969129 | 0.120879 | 0.079670 | 3 | 4.169679 |
| MYB83 | 0.967744 | 0.524725 | 0.365385 | 3 | 4.857854 |
| MYB32 | 0.919399 | 0.131868 | 0.071429 | 3 | 4.122695 |
| AT1G66810 | 0.913488 | 0.461538 | 0.250000 | 3 | 4.625026 |
| BEE2 | 0.837768 | 0.090659 | 0.214286 | 3 | 4.142713 |
| AT1G68200 | 0.835732 | 0.087912 | 0.403846 | 3 | 4.327491 |
| VND6 | 0.757931 | 0.159341 | 0.162088 | 3 | 4.079360 |
| VND1 | 0.756546 | 0.112637 | 0.222527 | 3 | 4.091711 |
| MYB46 | 0.751294 | 0.645604 | 0.810440 | 3 | 5.207338 |
| HB31 | 0.748963 | 0.041209 | 0.120879 | 3 | 3.911051 |
| AT3G51470 | 0.741789 | 0.016484 | 0.052198 | 3 | 3.810470 |
| AT1G24610 | 0.677186 | 0.247253 | 0.054945 | 3 | 3.979384 |
| VND7 | 0.643266 | 0.598901 | 0.527473 | 3 | 4.769639 |
| AT4G16610 | 0.526617 | 0.129121 | 0.156593 | 3 | 3.812332 |
| MMD1 | 0.459117 | 0.052198 | 0.035714 | 3 | 3.547029 |
| ZHD3 | 0.433392 | 0.546703 | 0.337912 | 3 | 4.318008 |
| VND4 | 0.265371 | 0.582418 | 0.563187 | 3 | 4.410975 |
| MYB52 | 0.212484 | 0.546703 | 0.189560 | 3 | 3.948748 |
| GATA6 | 0.208806 | 0.076923 | 0.035714 | 3 | 3.321444 |
| AT2G20100 | 0.206680 | 0.230769 | 0.063187 | 3 | 3.500636 |
| LBD31 | 0.195744 | 0.532967 | 0.274725 | 3 | 4.003436 |
| VND5 | 0.143886 | 0.615385 | 0.442308 | 3 | 4.201579 |
| NAC060 | 0.113182 | 0.140110 | 0.178571 | 3 | 3.431864 |
| BZIP17 | 0.106416 | 0.030220 | 0.008242 | 3 | 3.144878 |
| ERF9 | 0.077566 | 0.057692 | 0.002747 | 3 | 3.138006 |
| AT3G22100 | 0.052569 | 0.016484 | 0.002747 | 3 | 3.071799 |
| LBD18 | 0.050941 | 0.620879 | 0.406593 | 3 | 4.078414 |
| HB34 | 0.043305 | 0.118132 | 0.131868 | 3 | 3.293305 |
| JLO | 0.039968 | 0.631868 | 0.310440 | 3 | 3.982275 |
| FRS9 | 0.036668 | 0.005495 | 0.010989 | 3 | 3.053151 |
| SHP1 | 0.025391 | 0.010989 | 0.184066 | 3 | 3.220446 |
| AT5G25470 | 0.023015 | 0.087912 | 0.098901 | 3 | 3.209828 |
| MYB99 | 0.018724 | 0.016484 | 0.208791 | 3 | 3.243998 |
| AT1G24040 | 0.004730 | 0.173077 | 0.043956 | 3 | 3.221763 |
| AT5G46910 | 0.002619 | 0.296703 | 0.118132 | 3 | 3.417454 |
| AT3G04930 | 0.002437 | 0.008242 | 0.030220 | 3 | 3.040898 |
| LDL2 | 0.001544 | 0.002747 | 0.068681 | 3 | 3.072972 |
| PRR9 | 0.001317 | 0.027473 | 0.035714 | 3 | 3.064504 |
| AT2G36480 | 0.000954 | 0.002747 | 0.021978 | 3 | 3.025679 |
| AL2 | 0.000810 | 0.041209 | 0.027473 | 3 | 3.069491 |
| E2F3 | 0.000136 | 0.005495 | 0.010989 | 3 | 3.016620 |
| AT3G45880 | 0.000083 | 0.126374 | 0.027473 | 3 | 3.153929 |
In [46]:
## Phloem
celltype = 'phl'
cs = tf_spec[tf_spec['tf_occurance']==1][[celltype+'_betweenness_centrality', celltype+'_out_centrality',celltype+'_in_centrality']]
cso = (cs > 0)
cs['centrality_count'] = cso.sum(axis=1)
cs['centrality_sum'] = cs.sum(axis=1)
cs[cs['centrality_count']==3].sort_values(['centrality_count',celltype+'_betweenness_centrality'], ascending=False)
Out[46]:
| phl_betweenness_centrality | phl_out_centrality | phl_in_centrality | centrality_count | centrality_sum | |
|---|---|---|---|---|---|
| KNAT2 | 0.959314 | 0.030637 | 0.036765 | 3 | 4.026716 |
| LDL1 | 0.887614 | 0.012255 | 0.011029 | 3 | 3.910899 |
| PIF3 | 0.837399 | 0.074755 | 0.037990 | 3 | 3.950144 |
| BHLH101 | 0.766294 | 0.019608 | 0.018382 | 3 | 3.804284 |
| AGL80 | 0.728083 | 0.018382 | 0.017157 | 3 | 3.763622 |
| GATA20 | 0.696991 | 0.131127 | 0.166667 | 3 | 3.994785 |
| FRS11 | 0.696979 | 0.001225 | 0.008578 | 3 | 3.706783 |
| APRR2 | 0.648838 | 0.056373 | 0.013480 | 3 | 3.718691 |
| FLP | 0.635148 | 0.060049 | 0.182598 | 3 | 3.877795 |
| AT4G37180 | 0.435929 | 0.237745 | 0.223039 | 3 | 3.896713 |
| AT1G55750 | 0.431858 | 0.011029 | 0.024510 | 3 | 3.467397 |
| AT5G63700 | 0.365342 | 0.317402 | 0.286765 | 3 | 3.969509 |
| JAZ11 | 0.255876 | 0.062500 | 0.033088 | 3 | 3.351465 |
| AT2G45460 | 0.201431 | 0.009804 | 0.015931 | 3 | 3.227167 |
| BBX29 | 0.186423 | 0.008578 | 0.002451 | 3 | 3.197453 |
| EMB2219 | 0.076186 | 0.002451 | 0.004902 | 3 | 3.083539 |
| REM22 | 0.050953 | 0.012255 | 0.064951 | 3 | 3.128159 |
| SIGC | 0.034171 | 0.020833 | 0.019608 | 3 | 3.074612 |
| NAC020 | 0.013046 | 0.189951 | 0.321078 | 3 | 3.524075 |
| AGL15 | 0.011790 | 0.172794 | 0.207108 | 3 | 3.391692 |
| AT5G44080 | 0.008048 | 0.011029 | 0.025735 | 3 | 3.044812 |
| AT2G20280 | 0.006467 | 0.045343 | 0.028186 | 3 | 3.079997 |
| AT1G03150 | 0.004649 | 0.026961 | 0.006127 | 3 | 3.037738 |
| AT4G25610 | 0.004490 | 0.008578 | 0.001225 | 3 | 3.014294 |
| AT5G63080 | 0.002607 | 0.022059 | 0.014706 | 3 | 3.039372 |
| WRKY20 | 0.002433 | 0.023284 | 0.034314 | 3 | 3.060031 |
| ETL1 | 0.002422 | 0.026961 | 0.015931 | 3 | 3.045315 |
| NAC086 | 0.001367 | 0.066176 | 0.074755 | 3 | 3.142298 |
| LUH | 0.001216 | 0.051471 | 0.023284 | 3 | 3.075971 |
| AN3 | 0.000922 | 0.003676 | 0.001225 | 3 | 3.005824 |
| PCFS4 | 0.000304 | 0.018382 | 0.035539 | 3 | 3.054225 |
| AT1G10610 | 0.000298 | 0.008578 | 0.030637 | 3 | 3.039513 |
| DDL | 0.000256 | 0.002451 | 0.006127 | 3 | 3.008834 |
| AT1G10120 | 0.000217 | 0.024510 | 0.050245 | 3 | 3.074971 |
| NAC045 | 0.000146 | 0.042892 | 0.061275 | 3 | 3.104313 |
| GBF2 | 0.000128 | 0.042892 | 0.036765 | 3 | 3.079785 |
| BPC3 | 0.000116 | 0.012255 | 0.024510 | 3 | 3.036880 |
| ROS4 | 0.000104 | 0.034314 | 0.025735 | 3 | 3.060153 |
| AT2G40620 | 0.000093 | 0.008578 | 0.040441 | 3 | 3.049113 |
| BPEp | 0.000086 | 0.026961 | 0.014706 | 3 | 3.041752 |
| PAT1 | 0.000084 | 0.036765 | 0.033088 | 3 | 3.069937 |
| SOL1 | 0.000068 | 0.042892 | 0.033088 | 3 | 3.076048 |
| AT3G11450 | 0.000029 | 0.037990 | 0.018382 | 3 | 3.056401 |
| AT5G63280 | 0.000011 | 0.018382 | 0.009804 | 3 | 3.028197 |
| AT4G25210 | 0.000003 | 0.039216 | 0.039216 | 3 | 3.078434 |
| AT3G20010 | 0.000003 | 0.019608 | 0.015931 | 3 | 3.035542 |
| AT1G75510 | 0.000002 | 0.040441 | 0.036765 | 3 | 3.077207 |
| MBD13 | 0.000002 | 0.004902 | 0.028186 | 3 | 3.033090 |
| AT2G37650 | 0.000002 | 0.013480 | 0.002451 | 3 | 3.015933 |
In [47]:
tf_spec.to_csv('scRNAseq_TF_GRN_centrality_t7-t9_zscore3.csv', index=True)
In [48]:
tf_spec
Out[48]:
| tf_occurance | atri_degree_centrality | atri_out_centrality | atri_in_centrality | atri_betweenness_centrality | atri_closeness_centrality | atri_eigenvector_centrality | tri_degree_centrality | tri_out_centrality | tri_in_centrality | ... | phl_in_centrality | phl_betweenness_centrality | phl_closeness_centrality | phl_eigenvector_centrality | col_degree_centrality | col_out_centrality | col_in_centrality | col_betweenness_centrality | col_closeness_centrality | col_eigenvector_centrality | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AT5G57150 | 7.0 | 0.070664 | 0.023555 | 0.047109 | 0.000018 | 0.000227 | 0.042571 | 0.000000 | 0.000000 | 0.000000 | ... | 0.056373 | 0.000983 | 0.000371 | 0.044610 | 0.013228 | 0.007937 | 0.005291 | 0.000058 | 0.000149 | 0.006192 |
| HB-1 | 7.0 | 0.117773 | 0.070664 | 0.047109 | 0.000924 | 0.000198 | 0.053631 | 0.060606 | 0.035985 | 0.024621 | ... | 0.053922 | 0.000827 | 0.000362 | 0.048375 | 0.228836 | 0.099206 | 0.129630 | 0.000827 | 0.000330 | 0.062569 |
| KNAT7 | 7.0 | 0.481799 | 0.237687 | 0.244111 | 0.356935 | 0.000273 | 0.127985 | 0.017045 | 0.009470 | 0.007576 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.228836 | 0.126984 | 0.101852 | 0.001228 | 0.000291 | 0.060164 |
| WLIM1 | 7.0 | 0.154176 | 0.109208 | 0.044968 | 0.005652 | 0.000237 | 0.061079 | 0.100379 | 0.066288 | 0.034091 | ... | 0.063725 | 0.002138 | 0.000383 | 0.052749 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 |
| HAT22 | 7.0 | 0.092077 | 0.068522 | 0.023555 | 0.002109 | 0.000232 | 0.047758 | 0.522727 | 0.346591 | 0.176136 | ... | 0.011029 | 0.000000 | 0.000221 | 0.014375 | 0.201058 | 0.091270 | 0.109788 | 0.001246 | 0.000329 | 0.056127 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| PLT1 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.006614 | 0.001323 | 0.005291 | 0.000000 | 0.000148 | 0.003532 |
| CIB5 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.005291 | 0.001323 | 0.003968 | 0.000000 | 0.000131 | 0.003218 |
| AT4G34290 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.015873 | 0.002646 | 0.013228 | 0.000000 | 0.000194 | 0.007830 |
| TAF12 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.003968 | 0.002646 | 0.001323 | 0.000000 | 0.000114 | 0.001528 |
| AT5G44260 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | ... | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.002646 | 0.001323 | 0.001323 | 0.000000 | 0.000101 | 0.000960 |
1349 rows × 61 columns
In [ ]: